# ------------------------------------------------------------------------------------------------------------#  
# Replication code for: "Enforcing Compulsory Schooling by Linking Welfare Payments to School Attendance: 
#                        Lessons from Australia's Northern Territory"
#
# Author: Kyle Peyton (kyle.peyton@yale.edu)
#
# created 14 February 2014 
# updated 25 July 2014 
# ------------------------------------------------------------------------------------------------------------# 


require(foreign)
require(Synth)
require(kernlab)
require(reshape)
require(ggplot2)
require(scales)
require(ggthemes)
require(gridExtra)
require(MASS)
require(boot)
require(sandwich)
require(xtable)


####--------------Regression Analysis------------#####
naplan <- read.dta("naplanclean.dta")
indig <- naplan[naplan$indig==1 & naplan$jurisdiction %in% c("ACT","NSW","NT","QLD","SA","TAS","VIC","WA"),]

## Table 5: Indigenous outcomes, pooled difference-in-difference regressions (choose DV)
robustreg <- rlm(part_pct~nt+grade3+grade5+grade7+t2009+t2010+t2011+t2012+read+nt:t2009+nt:t2010+ 
                        nt:t2011+nt:t2012,data=indig)

fit <- fitted(robustreg)
e <- residuals(robustreg)
X <- model.matrix(robustreg)
boot.huber.fixed <- function(data,indices,maxit=20){
  y <- fit + e[indices]
  mod <- rlm(y~X-1,maxit=maxit)
  coefficients(mod)
}
fix.boot <- boot(indig,boot.huber.fixed,1999,maxit=100)
fix.boot

plot(fix.boot,index=11) ## examine distribution of treatment effect
boot.ci(fix.boot,index= 11,type = c("norm","perc","bca")) ## compare confidence intervals 

## compare to OLS w/ robust ses
lmfit <- lm(part_pct~nt+grade3+grade5+grade7+t2009+t2010+t2011+t2012+read+nt:t2009+nt:t2010+nt:t2011+nt:t2012,
            data=indig)
summary(lmfit)
diag(vcovHC(lmfit))


## Table 6: Regression estimates with Western Australia as a synthetic control (choose DV)
robustreg <- rlm(part_pct~nt+grade3+grade5+grade7+t2009+t2010+t2011+t2012+read+nt:t2009+nt:t2010+nt:t2011+nt:t2012,
                      data=indig[indig$jurisdiction %in% c("WA","NT"),])

fit <- fitted(robustreg)
e <- residuals(robustreg)
X <- model.matrix(robustreg)
boot.huber.fixed <- function(data,indices,maxit=20){
  y <- fit + e[indices]
  mod <- rlm(y~X-1,maxit=maxit)
  coefficients(mod)
}
fix.boot <- boot(indig[indig$jurisdiction %in% c("WA","NT"),],boot.huber.fixed,1999,maxit=100)
fix.boot

plot(fix.boot,index=11) ## examine distribution of treatment effect
boot.ci(fix.boot,index= 11,type = c("norm","perc","bca")) ## compare confidence intervals 

## compare to OLS w/ robust ses
lmfit <- lm(part_pct~nt+grade3+grade5+grade7+t2009+t2010+t2011+t2012+read+nt:t2009+nt:t2010+nt:t2011+nt:t2012,
            data=indig[indig$jurisdiction %in% c("WA","NT"),])
summary(lmfit)
diag(vcovHC(lmfit))


####--------Synthetic control placebo tests------#####

indig <- naplan[naplan$indig==1 & naplan$jurisdiction %in% c("ACT","NSW","NT","QLD","SA","TAS","VIC","WA"),]

listout <- list(year3math=NA,year5math=NA,year7math=NA,year9math=NA,year3read=NA,year5read=NA,year7read=NA,year9read=NA)
tableout <- list(year3math=NA,year5math=NA,year7math=NA,year9math=NA,year3read=NA,year5read=NA,year7read=NA,year9read=NA)
plotout <- list(year3math=NA,year5math=NA,year7math=NA,year9math=NA,year3read=NA,year5read=NA,year7read=NA,year9read=NA)

indig1 <- indig[indig$grade==3 & indig$domain=="math",]
indig2 <- indig[indig$grade==5 & indig$domain=="math",]
indig3 <- indig[indig$grade==7 & indig$domain=="math",]
indig4 <- indig[indig$grade==9 & indig$domain=="math",]

indig5 <- indig[indig$grade==3 & indig$domain=="read",]
indig6 <- indig[indig$grade==5 & indig$domain=="read",]
indig7 <- indig[indig$grade==7 & indig$domain=="read",]
indig8 <- indig[indig$grade==9 & indig$domain=="read",]

indiglist <- list(indig1,indig2,indig3,indig4,indig5,indig6,indig7,indig8)

for(i in 1:length(indiglist)) {
  
## Step 1: dataprep 
dataprep.out <- dataprep(
  foo = indiglist[[i]],
  predictors = c("abovemin","beta","passrate"),
  predictors.op = "mean",
  time.predictors.prior = 2008,
  special.predictors = list(list("part_pct", 2008, "mean")),
  dependent = "part_pct",
  unit.names.variable = "jurisdiction",
  unit.variable = "jurisno",
  time.variable = "year",
  treatment.identifier = 3,
  controls.identifier = c(1:2,4:8),
  time.optimize.ssr = 2008,
  time.plot = 2008:2012)

## run optimization procedure to allocate weights 
synth.out <- synth(data.prep.obj = dataprep.out, method = "BFGS")

## examine annual discrepancies between NT and synthetic counterpart 
gaps.nt <- dataprep.out$Y1plot - (dataprep.out$Y0 %*% synth.out$solution.w)

##summary tables and figures
synth.tables <- synth.tab(dataprep.res = dataprep.out, synth.res = synth.out)

##compare pre-treatment predictors 
tableout[[i]] <- synth.tables$tab.pred

### placebo tests via for loop , 
placebos <- c(1,2,4,5,6,7,8)
gaps <- matrix(NA,5,7)  

for (j in 1: length(placebos)) {
  treatment <- placebos[j]
  controls <- placebos[which(placebos != treatment)]

dataprep.out <- dataprep(
  foo = indiglist[[i]],
  predictors = c("abovemin","beta","passrate"),
  predictors.op = "mean",
  time.predictors.prior = 2008,
  special.predictors = list(list("part_pct", 2008, "mean")),
  dependent = "part_pct",
  unit.names.variable = "jurisdiction",
  unit.variable = "jurisno", 
  time.variable = "year",
  treatment.identifier = treatment,
  controls.identifier = controls,
  time.optimize.ssr = 2008,
  time.plot = 2008:2012)
synth.out <- synth(data.prep.obj = dataprep.out, method = "BFGS")

  gaps[,j] <- dataprep.out$Y1plot-(dataprep.out$Y0plot%*%synth.out$solution.w) 
  
}

colnames(gaps) <- c("ACT","NSW","QLD","SA","TAS","VIC","WA")
gaps <- data.frame(gaps)  

## add back in NT stats 
gaps$NT <- as.numeric(gaps.nt[,1])
gaps$year <- seq(2008,2012,1)

##reorganize data for ggplot
gaps <- melt(gaps, id=c("year"))
colnames(gaps) <- c("year","juris","gap")
gaps$nt <- ifelse(gaps$juris == "NT",1,0)

## store data for plots in list 
listout[[i]] <- gaps 

}

placeboresults <- list(listout)

####------Extract Results for Tables 7 & 8-------#####

## Note: must change instances of "part_pct" to "abovemin" 
#        and run synthetic control algorithm again for 
#        abovemin (Table 8)

placebotable <- data.frame(matrix(NA,8,8))

for (k in 1:ncol(placebotable)) {
  temp <- data.frame(placeboresults[[1]][k]) 
  colnames(temp) <- c("year","juris","gap","nt")
placebotable[,k] <- (temp[temp$year == 2009,]$gap - temp[temp$year == 2008,]$gap)*100

}

rownames(placebotable) <- unique(temp$juris)
colnames(placebotable) <- plotlist
xtable(placebotable)





